Set Up

rm(list = ls())
set.seed(2024)
library(tidyverse)
library(here)
library(phyloseq)
library(vegan)
library(rstatix)
theme_set(theme_bw())
max.core <- parallel::detectCores()

ps.rare <- readRDS(here('data','following_study','ps_rarefied.rds')) 
sample_data(ps.rare)$Shannon <- estimate_richness(ps.rare)$Shannon
# transform data into proportion
ps.rare.prop <- ps.rare %>%
  transform_sample_counts(function(x) x/sum(x))

sam <- data.frame(sample_data(ps.rare.prop))

Functions

plot_ord draws ordination plot for different factors using plot_ordination function in phyloseq package.

permanova performs permutational multivariate analysis of variance (PERMANOVA) based on adonis2 function in vegan package.

permdisp performs permutational analysis of multivariate dispersions (PERMDISP) based on betadisper function in vegan package.

plot_ord <- function(data, factor, method, distance){
    data.ord <- ordinate(data, method = method, distance = distance)
    p <- plot_ordination(data, data.ord, color = factor)
    p <- p + stat_ellipse(type = "t",geom = "polygon",alpha = 0)
    p <- p + ggtitle(str_c(factor,method,distance, sep = ' - '))
    print(p)
}

permanova <- function(data, formula, method, permutations=1e4, strata = NULL, core = max.core){
    message('PERMANOVA Model: ', method, '~', formula, '; Strata: ', ifelse(is_null(strata), 'None', as.character(strata)))
    dist.matrix <- phyloseq::distance(data, method=method)
    df <- data.frame(sample_data(data))
    model <- as.formula(paste0('dist.matrix~', formula))
    if (!is_null(strata)) {strata <- df[,strata]}
    result <- adonis2(model,
                      data = df,
                      permutations=permutations,
                      strata = strata,
                      parallel = core,
                      by = 'term',
                      na.action = na.omit)
    return(result)
}
permdisp <- function(data, group, method, permutations=1e4, pairwise = FALSE, core = max.core){
    message('PERMDISP Model: ', method, '~', group)
    dist.matrix <- phyloseq::distance(data, method=method)
    df <- data.frame(sample_data(data))
    beta.disp <- betadisper(dist.matrix, group = df[,group])
    result <- permutest(beta.disp, permutations = permutations, pairwise = pairwise, type = 'centroid')
    return(result)
}

Start

In this section, we want to estimate the effect of different factors on dog microbiomes. We focused on Household, Epileptic.or.Control, Breed.Group..1., Pheno.Y.N, Sex, and Age..months..

We compare the alpha diversity (Shannon index) among different factors using ANOVA.

We compare the beta diversity of different factors using PERMANOVA using the Bray-Curtis and weighted Unifrac distance, and visualized using multi-dimensional scaling.

PERMDISP was used to test the homogeneity of multivariate dispersions among groups.

Household Effect

Alpha Diversity

ggplot(sam,aes(x = as.numeric(Household), y = Shannon, group = Household)) +
    geom_point() + geom_line() + xlab('Household')

anova_test(Shannon~Household, data = sam, type = 1)
## ANOVA Table (type I tests)
## 
##      Effect DFn DFd     F     p p<.05  ges
## 1 Household  48  49 1.599 0.053       0.61

Here we see the Shannon diversity index is significantly different among households.

Beta Diversity

Bray-Curtis distance

permanova(ps.rare.prop, 'Household', 'bray')
## PERMANOVA Model: bray~Household; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##           Df SumOfSqs     R2      F    Pr(>F)    
## Household 48  12.2248 0.6893 2.2647 9.999e-05 ***
## Residual  49   5.5104 0.3107                     
## Total     97  17.7352 1.0000                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Weighted-Unifrac distance

permanova(ps.rare.prop, 'Household', 'wunifrac')
## PERMANOVA Model: wunifrac~Household; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##           Df SumOfSqs      R2      F    Pr(>F)    
## Household 48  1.90306 0.67767 2.1462 9.999e-05 ***
## Residual  49  0.90517 0.32233                     
## Total     97  2.80823 1.00000                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Epileptic Effect

Alpha Diversity

ggplot(sam, aes(x = Epileptic.or.Control, y = Shannon)) + 
    geom_boxplot() + geom_jitter(height = 0, width = 0.25)

anova_test(Shannon~Household+Epileptic.or.Control, data = sam, type = 1)
## ANOVA Table (type I tests)
## 
##                 Effect DFn DFd     F     p p<.05   ges
## 1            Household  48  48 1.568 0.061       0.611
## 2 Epileptic.or.Control   1  48 0.067 0.796       0.001

Beta Diversity

Bray-Curtis distance

plot_ord(ps.rare.prop, 'Epileptic.or.Control','MDS','bray')

plot_ord(ps.rare.prop, 'Epileptic.or.Control','NMDS','bray')
## Run 0 stress 0.2089049 
## Run 1 stress 0.2200629 
## Run 2 stress 0.2215701 
## Run 3 stress 0.2191062 
## Run 4 stress 0.2105267 
## Run 5 stress 0.233624 
## Run 6 stress 0.2301806 
## Run 7 stress 0.2046962 
## ... New best solution
## ... Procrustes: rmse 0.02758093  max resid 0.2550759 
## Run 8 stress 0.2225389 
## Run 9 stress 0.2040715 
## ... New best solution
## ... Procrustes: rmse 0.03769366  max resid 0.3007093 
## Run 10 stress 0.2213932 
## Run 11 stress 0.2217022 
## Run 12 stress 0.2115462 
## Run 13 stress 0.2106959 
## Run 14 stress 0.2097634 
## Run 15 stress 0.2042154 
## ... Procrustes: rmse 0.03712412  max resid 0.2768694 
## Run 16 stress 0.2087932 
## Run 17 stress 0.2212572 
## Run 18 stress 0.2053057 
## Run 19 stress 0.204233 
## ... Procrustes: rmse 0.03415718  max resid 0.3155705 
## Run 20 stress 0.2206824 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      5: no. of iterations >= maxit
##     15: stress ratio > sratmax

permanova(ps.rare.prop, 'Epileptic.or.Control', 'bray', strata = 'Household')
## PERMANOVA Model: bray~Epileptic.or.Control; Strata: Household
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  strata 
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##                      Df SumOfSqs      R2      F  Pr(>F)  
## Epileptic.or.Control  1   0.1584 0.00893 0.8651 0.09049 .
## Residual             96  17.5768 0.99107                 
## Total                97  17.7352 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.rare.prop, 'Epileptic.or.Control', 'bray')
## PERMDISP Model: bray~Epileptic.or.Control
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups     1 0.00182 0.0018249 0.1221  10000 0.7335
## Residuals 96 1.43422 0.0149398

Weighted-Unifrac distance

plot_ord(ps.rare.prop, 'Epileptic.or.Control','MDS','wunifrac')

plot_ord(ps.rare.prop, 'Epileptic.or.Control','NMDS','wunifrac')
## Run 0 stress 0.1677058 
## Run 1 stress 0.1773885 
## Run 2 stress 0.1662523 
## ... New best solution
## ... Procrustes: rmse 0.04596998  max resid 0.1482302 
## Run 3 stress 0.167717 
## Run 4 stress 0.1658647 
## ... New best solution
## ... Procrustes: rmse 0.0622066  max resid 0.358157 
## Run 5 stress 0.1708145 
## Run 6 stress 0.1699045 
## Run 7 stress 0.1703389 
## Run 8 stress 0.1638635 
## ... New best solution
## ... Procrustes: rmse 0.01674709  max resid 0.1152441 
## Run 9 stress 0.1765167 
## Run 10 stress 0.171215 
## Run 11 stress 0.1739947 
## Run 12 stress 0.1712411 
## Run 13 stress 0.1734899 
## Run 14 stress 0.171567 
## Run 15 stress 0.1691978 
## Run 16 stress 0.1660252 
## Run 17 stress 0.1706175 
## Run 18 stress 0.1727989 
## Run 19 stress 0.1740083 
## Run 20 stress 0.1685239 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      2: no. of iterations >= maxit
##     18: stress ratio > sratmax

permanova(ps.rare.prop, 'Epileptic.or.Control', 'wunifrac', strata = 'Household')
## PERMANOVA Model: wunifrac~Epileptic.or.Control; Strata: Household
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  strata 
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##                      Df SumOfSqs      R2      F Pr(>F)  
## Epileptic.or.Control  1  0.04037 0.01437 1.4001 0.0423 *
## Residual             96  2.76786 0.98563                
## Total                97  2.80823 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.rare.prop, 'Epileptic.or.Control', 'wunifrac')
## PERMDISP Model: wunifrac~Epileptic.or.Control
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq     F N.Perm Pr(>F)
## Groups     1 0.00698 0.0069804 1.867  10000 0.1746
## Residuals 96 0.35893 0.0037389

Breed Effect

Alpha Diversity

ps.breed <- ps.rare.prop %>%
  subset_samples(!is.na(Breed.Group..1.) & is.na(Breed.Group..2.))
ps.breed
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2548 taxa and 76 samples ]
## sample_data() Sample Data:       [ 76 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 2548 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 2548 tips and 2547 internal nodes ]
## refseq()      DNAStringSet:      [ 2548 reference sequences ]
sam.breed <- data.frame(sample_data(ps.breed))
  
ggplot(sam.breed) +
    geom_point(aes(x = Breed.Group..1., y = Shannon, colour = Breed.Group..1.)) +
    theme(axis.text.x = element_blank(), axis.ticks.x.bottom = element_blank())

anova_test(Shannon~Household + Breed.Group..1., data = sam.breed, type = 1)
## ANOVA Table (type I tests)
## 
##            Effect DFn DFd     F     p p<.05   ges
## 1       Household  40  30 1.667 0.074       0.690
## 2 Breed.Group..1.   5  30 0.505 0.770       0.078

Beta Diversity

Bray-Curtis distance

plot_ord(ps.breed, 'Breed.Group..1.','MDS','bray')
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

plot_ord(ps.breed, 'Breed.Group..1.','NMDS','bray')
## Run 0 stress 0.2032089 
## Run 1 stress 0.2056965 
## Run 2 stress 0.2043175 
## Run 3 stress 0.2132212 
## Run 4 stress 0.2147094 
## Run 5 stress 0.2136878 
## Run 6 stress 0.2050429 
## Run 7 stress 0.2026264 
## ... New best solution
## ... Procrustes: rmse 0.03578144  max resid 0.1609715 
## Run 8 stress 0.20483 
## Run 9 stress 0.2126997 
## Run 10 stress 0.2145529 
## Run 11 stress 0.2320328 
## Run 12 stress 0.2054623 
## Run 13 stress 0.2026752 
## ... Procrustes: rmse 0.02116857  max resid 0.1156994 
## Run 14 stress 0.2009081 
## ... New best solution
## ... Procrustes: rmse 0.06633316  max resid 0.2316631 
## Run 15 stress 0.1982802 
## ... New best solution
## ... Procrustes: rmse 0.04418728  max resid 0.2539096 
## Run 16 stress 0.2010472 
## Run 17 stress 0.1984365 
## ... Procrustes: rmse 0.03302231  max resid 0.1413722 
## Run 18 stress 0.2212248 
## Run 19 stress 0.1983042 
## ... Procrustes: rmse 0.03150234  max resid 0.1524821 
## Run 20 stress 0.1979161 
## ... New best solution
## ... Procrustes: rmse 0.01354783  max resid 0.07492009 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      4: no. of iterations >= maxit
##     16: stress ratio > sratmax
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

permanova(ps.breed, 'Household + Breed.Group..1.', 'bray')
## PERMANOVA Model: bray~Household + Breed.Group..1.; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##                 Df SumOfSqs      R2     F    Pr(>F)    
## Household       40   9.6517 0.71619 2.291 9.999e-05 ***
## Breed.Group..1.  5   0.6651 0.04935 1.263    0.1432    
## Residual        30   3.1596 0.23445                    
## Total           75  13.4763 1.00000                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.breed, 'Breed.Group..1.', 'bray')
## PERMDISP Model: bray~Breed.Group..1.
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df Sum Sq  Mean Sq      F N.Perm Pr(>F)
## Groups     6 0.0692 0.011534 0.7323  10000 0.6191
## Residuals 69 1.0868 0.015751

Weighted-Unifrac distance

plot_ord(ps.breed, 'Breed.Group..1.','MDS','wunifrac')
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

plot_ord(ps.breed, 'Breed.Group..1.','NMDS','wunifrac')
## Run 0 stress 0.1708208 
## Run 1 stress 0.1655651 
## ... New best solution
## ... Procrustes: rmse 0.06717692  max resid 0.259309 
## Run 2 stress 0.1695212 
## Run 3 stress 0.1653788 
## ... New best solution
## ... Procrustes: rmse 0.01218192  max resid 0.07788304 
## Run 4 stress 0.1803572 
## Run 5 stress 0.1690298 
## Run 6 stress 0.1667782 
## Run 7 stress 0.1643756 
## ... New best solution
## ... Procrustes: rmse 0.02868352  max resid 0.1110408 
## Run 8 stress 0.1641162 
## ... New best solution
## ... Procrustes: rmse 0.06546367  max resid 0.2741744 
## Run 9 stress 0.1694481 
## Run 10 stress 0.1664819 
## Run 11 stress 0.169577 
## Run 12 stress 0.163923 
## ... New best solution
## ... Procrustes: rmse 0.03862454  max resid 0.176506 
## Run 13 stress 0.1669618 
## Run 14 stress 0.1619331 
## ... New best solution
## ... Procrustes: rmse 0.03175062  max resid 0.1756096 
## Run 15 stress 0.1681841 
## Run 16 stress 0.1694186 
## Run 17 stress 0.162219 
## ... Procrustes: rmse 0.00912344  max resid 0.06310805 
## Run 18 stress 0.1659121 
## Run 19 stress 0.1671885 
## Run 20 stress 0.1621137 
## ... Procrustes: rmse 0.007986208  max resid 0.06120646 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: stress ratio > sratmax
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

permanova(ps.breed, 'Household + Breed.Group..1.', 'wunifrac')
## PERMANOVA Model: wunifrac~Household + Breed.Group..1.; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##                 Df SumOfSqs      R2      F    Pr(>F)    
## Household       40  1.52201 0.71072 2.3032 9.999e-05 ***
## Breed.Group..1.  5  0.12387 0.05784 1.4995    0.1014    
## Residual        30  0.49562 0.23144                     
## Total           75  2.14150 1.00000                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.breed, 'Breed.Group..1.', 'wunifrac')
## PERMDISP Model: wunifrac~Breed.Group..1.
## Warning in betadisper(dist.matrix, group = df[, group]): some squared distances
## are negative and changed to zero
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups     6 0.00548 0.0009135 0.1974  10000 0.9796
## Residuals 69 0.31932 0.0046278

Drug Effect

Alpha Diversity

ps.drug <- ps.rare.prop %>% 
  subset_samples(Epileptic.or.Control == 'Epileptic')
sam.drug <- data.frame(sample_data(ps.drug))
ggplot(sam.drug, aes(x = Pheno.Y.N, y = Shannon)) + 
    geom_boxplot() + geom_jitter(height = 0, width = 0.25)

anova_test(Shannon~Pheno.Y.N, data = sam.drug, type = 1)
## ANOVA Table (type I tests)
## 
##      Effect DFn DFd     F     p p<.05   ges
## 1 Pheno.Y.N   1  47 0.917 0.343       0.019

Beta Diversity

Bray-Curtis distance

plot_ord(ps.drug, 'Pheno.Y.N','MDS','bray')

plot_ord(ps.drug, 'Pheno.Y.N','NMDS','bray')
## Run 0 stress 0.2019485 
## Run 1 stress 0.2182105 
## Run 2 stress 0.2045303 
## Run 3 stress 0.2193361 
## Run 4 stress 0.204727 
## Run 5 stress 0.2026006 
## Run 6 stress 0.2282813 
## Run 7 stress 0.2027367 
## Run 8 stress 0.2164618 
## Run 9 stress 0.2303987 
## Run 10 stress 0.2059 
## Run 11 stress 0.2114223 
## Run 12 stress 0.2068373 
## Run 13 stress 0.2052279 
## Run 14 stress 0.2191267 
## Run 15 stress 0.2074245 
## Run 16 stress 0.2082746 
## Run 17 stress 0.2246078 
## Run 18 stress 0.2074868 
## Run 19 stress 0.212465 
## Run 20 stress 0.2082626 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      3: no. of iterations >= maxit
##     17: stress ratio > sratmax

permanova(ps.drug, 'Pheno.Y.N', 'bray')
## PERMANOVA Model: bray~Pheno.Y.N; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##           Df SumOfSqs      R2      F Pr(>F)
## Pheno.Y.N  1   0.1776 0.02081 0.9991 0.4374
## Residual  47   8.3565 0.97919              
## Total     48   8.5341 1.00000
permdisp(ps.drug, 'Pheno.Y.N', 'bray')
## PERMDISP Model: bray~Pheno.Y.N
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq     F N.Perm Pr(>F)
## Groups     1 0.01559 0.015594 1.181  10000 0.2784
## Residuals 47 0.62059 0.013204

Weighted-Unifrac distance

plot_ord(ps.drug, 'Pheno.Y.N','MDS','wunifrac')

plot_ord(ps.drug, 'Pheno.Y.N','NMDS','wunifrac')
## Run 0 stress 0.1787197 
## Run 1 stress 0.1912654 
## Run 2 stress 0.1974609 
## Run 3 stress 0.1909486 
## Run 4 stress 0.1973358 
## Run 5 stress 0.1843501 
## Run 6 stress 0.1811664 
## Run 7 stress 0.4021801 
## Run 8 stress 0.1905269 
## Run 9 stress 0.1960564 
## Run 10 stress 0.1795665 
## Run 11 stress 0.1838092 
## Run 12 stress 0.1829075 
## Run 13 stress 0.185102 
## Run 14 stress 0.1821448 
## Run 15 stress 0.190412 
## Run 16 stress 0.1885809 
## Run 17 stress 0.1822979 
## Run 18 stress 0.1875528 
## Run 19 stress 0.1902957 
## Run 20 stress 0.1821812 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: stress ratio > sratmax

permanova(ps.drug, 'Pheno.Y.N', 'wunifrac')
## PERMANOVA Model: wunifrac~Pheno.Y.N; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##           Df SumOfSqs      R2      F Pr(>F)
## Pheno.Y.N  1  0.01755 0.01429 0.6816 0.7432
## Residual  47  1.21039 0.98571              
## Total     48  1.22794 1.00000
permdisp(ps.drug, 'Pheno.Y.N', 'wunifrac')
## PERMDISP Model: wunifrac~Pheno.Y.N
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups     1 0.008442 0.0084418 2.7053  10000 0.1051
## Residuals 47 0.146663 0.0031205

Sex Effect

prop.test(xtabs(~Household+Sex, data = sam))
## Warning in prop.test(xtabs(~Household + Sex, data = sam)): Chi-squared
## approximation may be incorrect
## 
##  49-sample test for equality of proportions without continuity
##  correction
## 
## data:  xtabs(~Household + Sex, data = sam)
## X-squared = 52.464, df = 48, p-value = 0.3051
## alternative hypothesis: two.sided
## sample estimates:
##  prop 1  prop 2  prop 3  prop 4  prop 5  prop 6  prop 7  prop 8  prop 9 prop 10 
##     0.5     0.5     0.0     1.0     0.0     1.0     1.0     0.5     1.0     1.0 
## prop 11 prop 12 prop 13 prop 14 prop 15 prop 16 prop 17 prop 18 prop 19 prop 20 
##     0.5     1.0     1.0     1.0     0.5     0.5     1.0     1.0     0.0     0.5 
## prop 21 prop 22 prop 23 prop 24 prop 25 prop 26 prop 27 prop 28 prop 29 prop 30 
##     0.0     0.5     1.0     0.5     0.5     0.0     0.0     0.0     0.5     0.5 
## prop 31 prop 32 prop 33 prop 34 prop 35 prop 36 prop 37 prop 38 prop 39 prop 40 
##     1.0     0.5     1.0     1.0     0.0     0.5     0.5     0.5     0.5     0.5 
## prop 41 prop 42 prop 43 prop 44 prop 45 prop 46 prop 47 prop 48 prop 49 
##     0.0     1.0     1.0     0.5     0.5     1.0     0.5     1.0     0.5

Alpha Diversity

ggplot(sam, aes(x = Sex, y = Shannon)) + 
    geom_boxplot() + geom_jitter(height = 0, width = 0.25)

anova_test(Shannon~Household+Sex, data = sam, type = 1)
## ANOVA Table (type I tests)
## 
##      Effect DFn DFd     F     p p<.05   ges
## 1 Household  48  48 1.634 0.046     * 0.620
## 2       Sex   1  48 2.085 0.155       0.042

Beta Diversity

Bray-Curtis distance

plot_ord(ps.rare.prop, 'Sex','MDS','bray')

plot_ord(ps.rare.prop, 'Sex','NMDS','bray')
## Run 0 stress 0.2089049 
## Run 1 stress 0.2230145 
## Run 2 stress 0.2313955 
## Run 3 stress 0.2047349 
## ... New best solution
## ... Procrustes: rmse 0.02793474  max resid 0.2557777 
## Run 4 stress 0.2042607 
## ... New best solution
## ... Procrustes: rmse 0.04853521  max resid 0.4174251 
## Run 5 stress 0.2046638 
## ... Procrustes: rmse 0.006529649  max resid 0.05485916 
## Run 6 stress 0.2246074 
## Run 7 stress 0.2093822 
## Run 8 stress 0.2104461 
## Run 9 stress 0.2028582 
## ... New best solution
## ... Procrustes: rmse 0.04015983  max resid 0.3241834 
## Run 10 stress 0.2040942 
## Run 11 stress 0.2088666 
## Run 12 stress 0.2040468 
## Run 13 stress 0.2042522 
## Run 14 stress 0.2093865 
## Run 15 stress 0.2049003 
## Run 16 stress 0.2037889 
## Run 17 stress 0.2038654 
## Run 18 stress 0.2031013 
## ... Procrustes: rmse 0.005698808  max resid 0.04047538 
## Run 19 stress 0.2042237 
## Run 20 stress 0.2037019 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      4: no. of iterations >= maxit
##     16: stress ratio > sratmax

permanova(ps.rare.prop, 'Household+Sex', 'bray')
## PERMANOVA Model: bray~Household+Sex; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##           Df SumOfSqs      R2      F    Pr(>F)    
## Household 48  12.2248 0.68930 2.2419 9.999e-05 ***
## Sex        1   0.0575 0.00324 0.5062    0.9813    
## Residual  48   5.4529 0.30746                     
## Total     97  17.7352 1.00000                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.rare.prop, 'Sex', 'bray')
## PERMDISP Model: bray~Sex
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
## Groups     1 0.01776 0.017760 1.2052  10000 0.2726
## Residuals 96 1.41464 0.014736

Weighted-Unifrac distance

plot_ord(ps.rare.prop, 'Sex','MDS','wunifrac')

plot_ord(ps.rare.prop, 'Sex','NMDS','wunifrac')
## Run 0 stress 0.1677058 
## Run 1 stress 0.1688282 
## Run 2 stress 0.1763593 
## Run 3 stress 0.1753479 
## Run 4 stress 0.1676147 
## ... New best solution
## ... Procrustes: rmse 0.02692823  max resid 0.1579898 
## Run 5 stress 0.1766771 
## Run 6 stress 0.1659421 
## ... New best solution
## ... Procrustes: rmse 0.06233402  max resid 0.3920605 
## Run 7 stress 0.1737135 
## Run 8 stress 0.1665561 
## Run 9 stress 0.1703588 
## Run 10 stress 0.169404 
## Run 11 stress 0.1710069 
## Run 12 stress 0.1688368 
## Run 13 stress 0.166964 
## Run 14 stress 0.1672809 
## Run 15 stress 0.1663042 
## ... Procrustes: rmse 0.05890682  max resid 0.3666806 
## Run 16 stress 0.1706029 
## Run 17 stress 0.175832 
## Run 18 stress 0.1670706 
## Run 19 stress 0.1734538 
## Run 20 stress 0.1661993 
## ... Procrustes: rmse 0.03242898  max resid 0.2115321 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      2: no. of iterations >= maxit
##     18: stress ratio > sratmax

permanova(ps.rare.prop, 'Household+Sex', 'wunifrac')
## PERMANOVA Model: wunifrac~Household+Sex; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##           Df SumOfSqs      R2      F    Pr(>F)    
## Household 48  1.90306 0.67767 2.1118 9.999e-05 ***
## Sex        1  0.00402 0.00143 0.2139    0.9966    
## Residual  48  0.90115 0.32090                     
## Total     97  2.80823 1.00000                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permdisp(ps.rare.prop, 'Sex', 'wunifrac')
## PERMDISP Model: wunifrac~Sex
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 10000
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups     1 0.00158 0.0015789 0.3971  10000 0.5401
## Residuals 96 0.38166 0.0039757

Age Effect

age <- sam %>% 
    group_by(Household) %>% 
    summarise(age.diff = abs(diff(Age..months./12))) 
age$age.diff %>% hist()

age$age.diff %>% summary()
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.000   2.000   2.963   5.000  10.000
sum(age$age.diff < 4.5)
## [1] 35

Alpha Diversity

ggplot(sam) +
    geom_line(aes(x = as.numeric(Household), y = Age..months., group = Household)) + 
    geom_point(aes(x = as.numeric(Household), y = Age..months., group = Household, colour = Epileptic.or.Control)) +
    xlab('Household') + ylab('Age in month')

anova_test(Shannon~Household+Age..months., data = sam, type = 1)
## ANOVA Table (type I tests)
## 
##         Effect DFn DFd     F     p p<.05      ges
## 1    Household  48  48 1.566 0.062       0.610000
## 2 Age..months.   1  48 0.010 0.920       0.000215

Beta Diversity

Bray-Curtis distance

permanova(ps.rare.prop, 'Household + Age..months.', 'bray')
## PERMANOVA Model: bray~Household + Age..months.; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##              Df SumOfSqs      R2      F    Pr(>F)    
## Household    48  12.2248 0.68930 2.2618 9.999e-05 ***
## Age..months.  1   0.1056 0.00595 0.9377    0.5228    
## Residual     48   5.4048 0.30475                     
## Total        97  17.7352 1.00000                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Weighted-Unifrac distance

permanova(ps.rare.prop, 'Household + Age..months.', 'wunifrac')
## PERMANOVA Model: wunifrac~Household + Age..months.; Strata: None
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 10000
## 
## adonis2(formula = model, data = df, permutations = permutations, by = "term", parallel = core, na.action = na.omit, strata = strata)
##              Df SumOfSqs      R2      F    Pr(>F)    
## Household    48  1.90306 0.67767 2.1420 9.999e-05 ***
## Age..months.  1  0.01672 0.00596 0.9036    0.4899    
## Residual     48  0.88844 0.31637                     
## Total        97  2.80823 1.00000                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.4.1 (2024-06-14)
##  os       macOS 15.5
##  system   aarch64, darwin20
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       America/New_York
##  date     2025-06-13
##  pandoc   3.5 @ /Users/yixuanyang/miniforge3/bin/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package          * version date (UTC) lib source
##  abind              1.4-8   2024-09-12 [2] CRAN (R 4.4.1)
##  ade4               1.7-22  2023-02-06 [2] CRAN (R 4.4.0)
##  ape                5.8-1   2024-12-16 [2] CRAN (R 4.4.1)
##  backports          1.5.0   2024-05-23 [2] CRAN (R 4.4.0)
##  Biobase            2.66.0  2024-10-29 [2] Bioconductor 3.20 (R 4.4.1)
##  BiocGenerics       0.52.0  2024-10-29 [2] Bioconductor 3.20 (R 4.4.1)
##  biomformat         1.34.0  2024-10-29 [2] Bioconductor 3.20 (R 4.4.1)
##  Biostrings         2.74.0  2024-10-29 [2] Bioconductor 3.20 (R 4.4.1)
##  broom              1.0.7   2024-09-26 [2] CRAN (R 4.4.1)
##  bslib              0.9.0   2025-01-30 [1] CRAN (R 4.4.1)
##  cachem             1.1.0   2024-05-16 [2] CRAN (R 4.4.0)
##  car                3.1-3   2024-09-27 [2] CRAN (R 4.4.1)
##  carData            3.0-5   2022-01-06 [2] CRAN (R 4.4.0)
##  cli                3.6.4   2025-02-13 [2] CRAN (R 4.4.1)
##  cluster            2.1.8   2024-12-11 [2] CRAN (R 4.4.1)
##  codetools          0.2-20  2024-03-31 [2] CRAN (R 4.4.1)
##  colorspace         2.1-1   2024-07-26 [2] CRAN (R 4.4.0)
##  crayon             1.5.3   2024-06-20 [2] CRAN (R 4.4.0)
##  data.table         1.16.4  2024-12-06 [2] CRAN (R 4.4.1)
##  digest             0.6.37  2024-08-19 [2] CRAN (R 4.4.1)
##  dplyr            * 1.1.4   2023-11-17 [2] CRAN (R 4.4.0)
##  evaluate           1.0.3   2025-01-10 [1] CRAN (R 4.4.1)
##  farver             2.1.2   2024-05-13 [2] CRAN (R 4.4.0)
##  fastmap            1.2.0   2024-05-15 [2] CRAN (R 4.4.0)
##  forcats          * 1.0.0   2023-01-29 [2] CRAN (R 4.4.0)
##  foreach            1.5.2   2022-02-02 [2] CRAN (R 4.4.0)
##  Formula            1.2-5   2023-02-24 [2] CRAN (R 4.4.0)
##  generics           0.1.3   2022-07-05 [2] CRAN (R 4.4.0)
##  GenomeInfoDb       1.42.0  2024-10-29 [2] Bioconductor 3.20 (R 4.4.1)
##  GenomeInfoDbData   1.2.13  2024-10-05 [2] Bioconductor
##  ggplot2          * 3.5.1   2024-04-23 [2] CRAN (R 4.4.0)
##  glue               1.8.0   2024-09-30 [2] CRAN (R 4.4.1)
##  gtable             0.3.6   2024-10-25 [2] CRAN (R 4.4.1)
##  here             * 1.0.1   2020-12-13 [2] CRAN (R 4.4.0)
##  hms                1.1.3   2023-03-21 [2] CRAN (R 4.4.0)
##  htmltools          0.5.8.1 2024-04-04 [2] CRAN (R 4.4.0)
##  httr               1.4.7   2023-08-15 [2] CRAN (R 4.4.0)
##  igraph             2.1.2   2024-12-07 [2] CRAN (R 4.4.1)
##  IRanges            2.40.0  2024-10-29 [2] Bioconductor 3.20 (R 4.4.1)
##  iterators          1.0.14  2022-02-05 [2] CRAN (R 4.4.0)
##  jquerylib          0.1.4   2021-04-26 [2] CRAN (R 4.4.0)
##  jsonlite           2.0.0   2025-03-27 [1] CRAN (R 4.4.1)
##  knitr              1.50    2025-03-16 [1] CRAN (R 4.4.1)
##  labeling           0.4.3   2023-08-29 [2] CRAN (R 4.4.0)
##  lattice          * 0.22-6  2024-03-20 [2] CRAN (R 4.4.1)
##  lifecycle          1.0.4   2023-11-07 [2] CRAN (R 4.4.0)
##  lubridate        * 1.9.4   2024-12-08 [2] CRAN (R 4.4.1)
##  magrittr           2.0.3   2022-03-30 [2] CRAN (R 4.4.0)
##  MASS               7.3-61  2024-06-13 [2] CRAN (R 4.4.0)
##  Matrix             1.7-1   2024-10-18 [2] CRAN (R 4.4.1)
##  mgcv               1.9-1   2023-12-21 [2] CRAN (R 4.4.1)
##  multtest           2.62.0  2024-10-29 [2] Bioconductor 3.20 (R 4.4.1)
##  munsell            0.5.1   2024-04-01 [2] CRAN (R 4.4.0)
##  nlme               3.1-166 2024-08-14 [2] CRAN (R 4.4.0)
##  permute          * 0.9-7   2022-01-27 [2] CRAN (R 4.4.0)
##  phyloseq         * 1.50.0  2024-10-29 [2] Bioconductor 3.20 (R 4.4.1)
##  pillar             1.10.2  2025-04-05 [1] CRAN (R 4.4.1)
##  pkgconfig          2.0.3   2019-09-22 [2] CRAN (R 4.4.0)
##  plyr               1.8.9   2023-10-02 [2] CRAN (R 4.4.0)
##  purrr            * 1.0.4   2025-02-05 [2] CRAN (R 4.4.1)
##  R6                 2.6.1   2025-02-15 [1] CRAN (R 4.4.1)
##  Rcpp               1.0.14  2025-01-12 [1] CRAN (R 4.4.1)
##  readr            * 2.1.5   2024-01-10 [2] CRAN (R 4.4.0)
##  reshape2           1.4.4   2020-04-09 [2] CRAN (R 4.4.0)
##  rhdf5              2.49.0  2024-05-04 [2] Bioconductor 3.20 (R 4.4.0)
##  rhdf5filters       1.17.0  2024-05-04 [2] Bioconductor 3.20 (R 4.4.0)
##  Rhdf5lib           1.27.0  2024-05-04 [2] Bioconductor 3.20 (R 4.4.0)
##  rlang              1.1.5   2025-01-17 [2] CRAN (R 4.4.1)
##  rmarkdown          2.29    2024-11-04 [2] CRAN (R 4.4.1)
##  rprojroot          2.0.4   2023-11-05 [2] CRAN (R 4.4.0)
##  rstatix          * 0.7.2   2023-02-01 [2] CRAN (R 4.4.0)
##  rstudioapi         0.17.1  2024-10-22 [2] CRAN (R 4.4.1)
##  S4Vectors          0.44.0  2024-10-29 [2] Bioconductor 3.20 (R 4.4.1)
##  sass               0.4.9   2024-03-15 [2] CRAN (R 4.4.0)
##  scales             1.3.0   2023-11-28 [2] CRAN (R 4.4.0)
##  sessioninfo        1.2.2   2021-12-06 [2] CRAN (R 4.4.0)
##  stringi            1.8.7   2025-03-27 [1] CRAN (R 4.4.1)
##  stringr          * 1.5.1   2023-11-14 [2] CRAN (R 4.4.0)
##  survival           3.8-3   2024-12-17 [2] CRAN (R 4.4.1)
##  tibble           * 3.2.1   2023-03-20 [2] CRAN (R 4.4.0)
##  tidyr            * 1.3.1   2024-01-24 [2] CRAN (R 4.4.0)
##  tidyselect         1.2.1   2024-03-11 [2] CRAN (R 4.4.0)
##  tidyverse        * 2.0.0   2023-02-22 [2] CRAN (R 4.4.0)
##  timechange         0.3.0   2024-01-18 [2] CRAN (R 4.4.0)
##  tzdb               0.4.0   2023-05-12 [2] CRAN (R 4.4.0)
##  UCSC.utils         1.2.0   2024-10-29 [2] Bioconductor 3.20 (R 4.4.1)
##  vctrs              0.6.5   2023-12-01 [2] CRAN (R 4.4.0)
##  vegan            * 2.6-8   2024-08-28 [2] CRAN (R 4.4.1)
##  withr              3.0.2   2024-10-28 [2] CRAN (R 4.4.1)
##  xfun               0.52    2025-04-02 [1] CRAN (R 4.4.1)
##  XVector            0.46.0  2024-10-29 [2] Bioconductor 3.20 (R 4.4.1)
##  yaml               2.3.10  2024-07-26 [2] CRAN (R 4.4.0)
##  zlibbioc           1.52.0  2024-10-29 [2] Bioconductor 3.20 (R 4.4.1)
## 
##  [1] /Users/yixuanyang/Library/R/arm64/4.4/library
##  [2] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
## 
## ──────────────────────────────────────────────────────────────────────────────